library(phyloseq)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(decontam)
library(ggplot2)
library(vegan)
## Loading required package: permute
library(ggordiplots)
## Loading required package: glue
library(microbiome)
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:vegan':
##
## diversity
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(pheatmap)
library(cowplot)
library(RColorBrewer)
library(ggvegan)
library(ggrepel)
library(microbiomeMarker)
## Registered S3 method overwritten by 'gplots':
## method from
## reorder.factor DescTools
##
## Attaching package: 'microbiomeMarker'
## The following objects are masked from 'package:microbiome':
##
## abundances, aggregate_taxa
## The following object is masked from 'package:phyloseq':
##
## plot_heatmap
library(gllvm)
## Loading required package: TMB
##
## Attaching package: 'TMB'
## The following object is masked from 'package:microbiomeMarker':
##
## normalize
##
## Attaching package: 'gllvm'
## The following object is masked from 'package:vegan':
##
## ordiplot
## The following object is masked from 'package:stats':
##
## simulate
library(microbiomeutilities)
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
##
## Attaching package: 'microbiomeutilities'
## The following object is masked from 'package:microbiome':
##
## add_refseq
library(MoMAColors)
## Registered S3 method overwritten by 'MoMAColors':
## method from
## print.palette DescTools
Quick access options for output graphics to make legends and axis labels larger / more legible
#Global Plot Options
plotopts<- theme(axis.text.y=element_text(size=20),axis.text.x=element_text(size=12),axis.title=element_text(size=20),strip.text=element_text(size=20),legend.title = element_text(size=20),legend.text = element_text(size=20))
#Smalller Legend Labs
plotopts2<- theme(axis.text.y=element_text(size=20),axis.text.x=element_text(size=12),axis.title=element_text(size=20),strip.text=element_text(size=20),legend.title = element_text(size=15),legend.text = element_text(size=12))
## Global Site Colors
sitecols<-c(brewer.pal(8,"Set2"),brewer.pal(9,"Paired")[9])
###### Stop Warnings turning into Errors
options(warn=1)
ps_woodchester<-readRDS('Woodchester Phyloseq Jun25 RID.Rdata')
############ DATA CLEANUP
#Prune Taxa With No Phylum Assignment
ps_prune<-prune_taxa(as.vector(!is.na(tax_table(ps_woodchester)[,2])),ps_woodchester)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ntaxa(ps_woodchester)-ntaxa(ps_prune)
## [1] 91
#Prune Chloroplasts
ps_prune_nochloro<-prune_taxa(as.vector(tax_table(ps_prune)[,4]!="Chloroplast"),ps_prune)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ntaxa(ps_prune)-ntaxa(ps_prune_nochloro)
## [1] 829
#Remove Mitochondria
ps_prune_nochloro_nomito<-prune_taxa(as.vector(tax_table(ps_prune_nochloro)[,5]!="Mitochondria"),ps_prune_nochloro)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Remove Archaea
ps_prune_nochloro_nomito_noarchaea<-prune_taxa(as.vector(tax_table(ps_prune_nochloro_nomito)[,1]!="Archea"),ps_prune_nochloro_nomito)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ntaxa(ps_woodchester) - ntaxa(ps_prune_nochloro_nomito_noarchaea)
## [1] 2604
#Inspect Library Sizes
df <- as.data.frame(sample_data(ps_prune_nochloro_nomito_noarchaea)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(ps_prune_nochloro_nomito_noarchaea)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
df$sample_or_control<-ifelse(df$sampletype %in% c("negative","negative_ve"),"negative","sample")
ggplot(data=df, aes(x=Index, y=LibrarySize,colour=sample_or_control)) + geom_point()
##Identify Contaminants by Prevalence
sample_data(ps_prune_nochloro_nomito_noarchaea)$is.neg <- sample_data(ps_prune_nochloro_nomito_noarchaea)$sampletype == "negative"
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
contamdf.prev <- isContaminant(ps_prune_nochloro_nomito_noarchaea, method="prevalence", neg="is.neg",threshold=0.6,normalize=T)
#How Many Contaminants?
table(contamdf.prev$contaminant) #722
##
## FALSE TRUE
## 8182 722
Seems to be one ASV present in 3 negatives at high abundance - but decontam thinks this is contamination from true samples. Probably because it is not in the other 3 negatives
# Make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(ps_prune_nochloro_nomito_noarchaea, function(abund) 1*(abund>0))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ps.pa.neg <- prune_samples(sample_data(ps.pa)$is.neg == TRUE, ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$is.neg == FALSE, ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev$contaminant)
#Plot
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")
## Remove Contaminants
ps_clean<-prune_taxa(!contamdf.prev$contaminant,ps_prune_nochloro_nomito_noarchaea)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
##Remove Mocks and Negatives
table(sample_data(ps_clean)$sampletype)
##
## badger negative
## 167 6
ps_badger<-subset_samples(ps_clean,sampletype=="badger" & !Sample %in% c("A","B"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
####### Read Threshold and Sample Prevalence Threshold
ps_clean_filter = filter_taxa(ps_badger, function(x) sum(x) > 10, TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#ps_clean_filter<-ps_badger
Now that we’ve done that, we can check what our post-QC library sizes are. It’s a good idea to report these in manuscripts and thesis chapters. Here was ask what the minimum, maximum and mean library sizes-per-samples are.
############## POST QC LIBRARY SIZES
###############
# Post-QC Library Stats
###############
mean(sample_sums(ps_clean_filter))
## [1] 56101.14
range(sample_sums(ps_clean_filter))
## [1] 13569 267515
badger_matrix<- t(as(otu_table(ps_clean_filter),"matrix"))
rarecurve(badger_matrix,step=50,cex=0.5)
## Warning in rarecurve(badger_matrix, step = 50, cex = 0.5): most observed count
## data have counts 1, but smallest count is 2
#Final Processing
#Only Badgers
#ps_badger01<-prune_samples(sample_data(ps_clean_filter)$day0_diagnosis!="0.5",ps_clean_filter)
ps_badger01<-ps_clean_filter
#Badgers
length(unique(sample_data(ps_clean_filter)$id))
## [1] 72
#Infection
table(sample_data(ps_clean_filter)$day0_cdp)
##
## 0.002162504 0.011800939 0.018212324 0.018355678 0.023783676 0.118358648
## 1 1 48 1 61 1
## 0.360305381 0.395608186 0.425203759 0.534689505 0.601465025 0.610032516
## 1 4 1 2 9 1
## 0.673103086 0.756314689 0.803003275 0.851805343 0.95890396 0.984162882
## 1 1 1 1 1 1
## 0.984493598 0.988353817 0.989785771 0.991890524 0.992220949 0.99554418
## 1 1 1 1 1 2
## 0.99653801 0.998535869 0.998934351 0.999090161 0.999281803 0.999386843
## 2 1 1 1 1 1
## 0.999743345 0.999822301 0.99983847 0.999925638 0.999983166 0.999999899
## 1 1 1 1 1 1
## 0.999999924 0.999999931 0.999999998 0.999999999 1
## 1 1 1 1 4
#Known Age
table(sample_data(ps_clean_filter)$age_fc)
##
## ADULT CUB
## 20 145
length(unique(sample_data(ps_clean_filter)$id[sample_data(ps_clean_filter)$age_fc =="CUB"]))
## [1] 62
##Extract Metadata
meta5<-as(sample_data(ps_clean_filter),"data.frame")
#Minimum Read Depth
min(sample_sums(ps_clean_filter))
## [1] 13569
#Rarefy
ps_rare<-rarefy_even_depth(ps_clean_filter,rngseed = 10112022,sample.size = 13569)
## `set.seed(10112022)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(10112022); .Random.seed` for the full vector
## ...
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## 21OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Richness
badger_rich<-estimate_richness(ps_rare,measures = c("Observed","Shannon"))
badger_rich$Sample<- sample_data(ps_rare)$Sample
badger_rich<-left_join(badger_rich,meta5,"Sample")
##Filter
table(badger_rich$social_group,useNA="ifany")
##
## BEECH BOXWOOD COLE PARK COLLIERS WOOD HONEYWELL
## 2 18 13 11 26
## KENNEL LARCH NETTLE OLD OAK PARK MILL
## 16 6 7 12 1
## SCOTLAND BANK SEPTIC TANK TOP SETT WEST WINDSOR EDGE
## 8 2 1 5 12
## WOOD FARM WOODRUSH YEW
## 13 2 10
badger_rich$year_social<-with(badger_rich,paste(capture_year,social_group,sep="_"))
year_social_table<-table( badger_rich$year_social)
#Filter to Only Cases Where there are 3 or more data points
year_social3<-names(year_social_table)[year_social_table>2]
badger_rich_filter3<-subset(badger_rich,year_social %in% year_social3)
tb_lab<-expression(paste(italic("M. bovis "), "Infection Probability"))
#Sample size
nrow(badger_rich)
## [1] 165
#All Badgers
richplot_tb1<-ggplot(badger_rich,aes(x=day0_cdp,y=Observed)) + geom_smooth(method="lm") + geom_point(size=5,shape=21,colour="white",fill=moma.colors("Levine2")[3]) + facet_grid(.~capture_year) + theme_bw(base_size = 15)
richplot_tb2<- richplot_tb1 + labs(x=tb_lab,y="Observed Bacterial \n Richness")
richplot_tb2
## `geom_smooth()` using formula = 'y ~ x'
ggsave2('All Badgers Continuous Tb Status by Year.pdf',richplot_tb2,width=22,height=10,units="cm")
## `geom_smooth()` using formula = 'y ~ x'
tb_richness_plot1<-plot_grid(richplot_tb_soc1,richplot_tb2,nrow=2,labels="AUTO",label_size = 22,rel_heights = c(2,1))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
tb_richness_plot1
ggsave2('All Badgers Richness Continuous Tb Combined Plot.pdf',tb_richness_plot1,width=20,height=22,units="cm")
ggsave2('Richness Tb Combined Plot.tiff',tb_richness_plot1,width=20,height=22,units="cm")
## Relevel COndition
badger_rich$condition<-factor(badger_rich$condition,levels=c("POOR","FAIR","GOOD","VERY GOOD"))
#Richness Plot by MASS
mass_plot1<-badger_rich %>% filter(!is.na(weight)) %>% ggplot(.,aes(x=weight,y=Observed)) + geom_smooth(method="lm") + geom_point(shape=21,aes(fill=condition),size=5,color="white",alpha=0.7) + theme_bw()
mass_plot2 <- mass_plot1+ theme(axis.text=element_text(size=20),axis.title = element_text(size=20),strip.text.x=element_text(size=20)) + labs(y="Observed Bacterial \n Richness",x="Mass (kg)")
mass_plot3<-mass_plot2 + facet_wrap(capture_year~.) + labs(fill="Condition") + scale_fill_brewer(palette = "Paired")
mass_plot3
## `geom_smooth()` using formula = 'y ~ x'
ggsave2('Mass by Year.pdf',mass_plot3,width=25,height=10,units="cm")
## `geom_smooth()` using formula = 'y ~ x'
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:microbiomeMarker':
##
## nsamples
## The following object is masked from 'package:phyloseq':
##
## nsamples
## The following object is masked from 'package:stats':
##
## ar
## Sex Variable
#badger_rich$sex<-ifelse(is.na(badger_rich$testes),"F","M")
with(badger_rich,table(reproc,testes,exclude = NULL))
## testes
## reproc ASCENDED DESCENDED MID <NA>
## LACTATING 0 0 0 9
## LACTATING SOMETIME 0 0 0 7
## LACTATING THIS YEAR 0 0 0 7
## NEVER LACTATING 0 0 0 47
## PREGNANT 0 0 0 1
## UNSURE 0 0 0 3
## <NA> 5 65 17 4
subset(badger_rich,is.na(reproc) & is.na(testes))
## Observed Shannon Sample sampletype location date.x tattoo_date
## 30 35 1.608767 13-2018 badger Woodchester 09/08/2016 39N 09/08/2016
## 71 51 2.378778 172-2018 badger Woodchester 09/08/2016 28A 09/08/2016
## 96 61 2.740949 29-2018 badger Woodchester 09/08/2016 39N 09/08/2016
## 107 84 2.936276 4-2018 badger Woodchester 10/08/2016 6C 10/08/2016
## moc social_group sett capture_year where weight toothwear condition
## 30 TRAP NETTLE NETTLE 2016 SETT 6.7 0.75 POOR
## 71 TRAP WINDSOR EDGE HOPPER 2016 SETT 8.8 0.50 GOOD
## 96 TRAP NETTLE NETTLE 2016 SETT 6.7 0.75 POOR
## 107 TRAP HONEYWELL BEND 2016 SETT 4.0 0.00 FAIR
## reproc testes sampledate_julian highest_cdp highest_cdp_time day0_cdp
## 30 <NA> <NA> 17021.96 days 0.99653801 0 0.99653801
## 71 <NA> <NA> 17021.96 days 0.02378368 0 0.02378368
## 96 <NA> <NA> 17021.96 days 0.99653801 0 0.99653801
## 107 <NA> <NA> 17022.96 days 0.02378368 -43 0.02378368
## date pm age_fc year_fc known_age id is.neg year_social
## 30 09/08/2016 No CUB 2009 7 ID22 FALSE 2016_NETTLE
## 71 09/08/2016 No CUB 2014 2 ID32 FALSE 2016_WINDSOR EDGE
## 96 09/08/2016 No CUB 2009 7 ID22 FALSE 2016_NETTLE
## 107 28/06/2016 No CUB 2016 1 ID27 FALSE 2016_HONEYWELL
##Filter Data to No Missing Values for Any Predictor
badger_rich_complete<-badger_rich[with(badger_rich,complete.cases(weight,capture_year,toothwear,social_group,day0_cdp)),]
##Summary Stats
nrow(badger_rich_complete) # 165 samples
## [1] 165
length(unique(badger_rich_complete$social_group)) #18 social groups (18 in BRMS model)
## [1] 18
length(unique(badger_rich_complete$capture_year)) #3 capture years
## [1] 3
length(unique(badger_rich_complete$id)) #72 badgers
## [1] 72
##Factor for Cap Year
badger_rich_complete$capture_year<-factor(badger_rich_complete$capture_year)
## Check Correlation Between Age and Mass
ggplot(badger_rich_complete,aes(x=known_age,y=weight)) + geom_smooth(method="lm",formula=y~x+I(x^2)) + geom_point(shape=21,size=5,fill="white")
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
with(badger_rich_complete,cor.test(known_age,weight))
##
## Pearson's product-moment correlation
##
## data: known_age and weight
## t = 1.6465, df = 143, p-value = 0.1019
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02721656 0.29289479
## sample estimates:
## cor
## 0.1363978
### All Data Plot
ggplot(badger_rich_complete,aes(x=known_age,y=Observed)) + geom_smooth(method="lm") + geom_point(shape=21,size=5,fill="white") + scale_x_continuous(n.breaks=11) + theme_bw() + plotopts + labs(y="Observed Richness",x="Age")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
with(badger_rich_complete,cor.test(known_age,Observed))
##
## Pearson's product-moment correlation
##
## data: known_age and Observed
## t = -2.3428, df = 143, p-value = 0.02052
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.34447089 -0.03019432
## sample estimates:
## cor
## -0.1922573
### Subset to Repeat Sampled Badgers
badger_rich_tab<-table(badger_rich_complete$id,badger_rich_complete$known_age)
badger_rich_tab2<-apply(badger_rich_tab,1,function(x)sum(x>0))
badger_rich_complete_min3<-subset(badger_rich_complete,id %in% names(badger_rich_tab2)[badger_rich_tab2>2])
table(badger_rich_complete_min3$id)
##
## ID11 ID12 ID13 ID2 ID23 ID8
## 4 3 6 8 4 7
#### PLOT
ggplot(badger_rich_complete_min3,aes(x=known_age,y=Observed,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Observed Richness",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none")
## `geom_smooth()` using formula = 'y ~ x'
#Fix Capture Year
badger_rich_complete$capture_year<-factor(badger_rich_complete$capture_year)
badger_rich_complete$toothwear<-as.factor(badger_rich_complete$toothwear)
#Sex
subset(badger_rich_complete,is.na(reproc) & is.na(testes))
## Observed Shannon Sample sampletype location date.x tattoo_date
## 30 35 1.608767 13-2018 badger Woodchester 09/08/2016 39N 09/08/2016
## 71 51 2.378778 172-2018 badger Woodchester 09/08/2016 28A 09/08/2016
## 96 61 2.740949 29-2018 badger Woodchester 09/08/2016 39N 09/08/2016
## 107 84 2.936276 4-2018 badger Woodchester 10/08/2016 6C 10/08/2016
## moc social_group sett capture_year where weight toothwear condition
## 30 TRAP NETTLE NETTLE 2016 SETT 6.7 0.75 POOR
## 71 TRAP WINDSOR EDGE HOPPER 2016 SETT 8.8 0.5 GOOD
## 96 TRAP NETTLE NETTLE 2016 SETT 6.7 0.75 POOR
## 107 TRAP HONEYWELL BEND 2016 SETT 4.0 0 FAIR
## reproc testes sampledate_julian highest_cdp highest_cdp_time day0_cdp
## 30 <NA> <NA> 17021.96 days 0.99653801 0 0.99653801
## 71 <NA> <NA> 17021.96 days 0.02378368 0 0.02378368
## 96 <NA> <NA> 17021.96 days 0.99653801 0 0.99653801
## 107 <NA> <NA> 17022.96 days 0.02378368 -43 0.02378368
## date pm age_fc year_fc known_age id is.neg year_social
## 30 09/08/2016 No CUB 2009 7 ID22 FALSE 2016_NETTLE
## 71 09/08/2016 No CUB 2014 2 ID32 FALSE 2016_WINDSOR EDGE
## 96 09/08/2016 No CUB 2009 7 ID22 FALSE 2016_NETTLE
## 107 28/06/2016 No CUB 2016 1 ID27 FALSE 2016_HONEYWELL
badger_rich_complete$sex<-ifelse(is.na(badger_rich_complete$testes),"F","M")
badger_rich_complete$sex[badger_rich_complete$id=="ID27"]<-"M"
badger_rich_complete$sex[badger_rich_complete$id=="ID32"]<-"F"
## Subset to Known Age
badger_rich_complete_age<-subset(badger_rich_complete,!is.na(known_age))
#Sample Size
nrow(badger_rich_complete_age)
## [1] 145
length(unique(badger_rich_complete_age$id))
## [1] 62
length(unique(badger_rich_complete_age$social_group))
## [1] 18
library(brms)
#Model For Each Outcome
bf_rich<-bf(Observed ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1|p|social_group) + (1|q|id)) + negbinomial()
bf_mass<-bf(weight ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1|p|social_group) + (1|q|id)) + gaussian()
fit1 <- brm(
bf_rich + bf_mass + set_rescor(FALSE),
data = badger_rich_complete_age, chains = 4, cores = 4,
control = list(adapt_delta = 0.95),save_pars = save_pars(all = TRUE),
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
summary(fit1)
## Family: MV(negbinomial, gaussian)
## Links: mu = log; shape = identity
## mu = identity; sigma = identity
## Formula: Observed ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1 | p | social_group) + (1 | q | id)
## weight ~ capture_year + known_age + I(known_age^2) + sex + day0_cdp + (1 | p | social_group) + (1 | q | id)
## Data: badger_rich_complete_age (Number of observations: 145)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Multilevel Hyperparameters:
## ~id (Number of levels: 62)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Observed_Intercept) 0.13 0.05 0.02 0.22
## sd(weight_Intercept) 0.83 0.26 0.25 1.31
## cor(Observed_Intercept,weight_Intercept) 0.23 0.38 -0.54 0.92
## Rhat Bulk_ESS Tail_ESS
## sd(Observed_Intercept) 1.00 872 836
## sd(weight_Intercept) 1.00 853 791
## cor(Observed_Intercept,weight_Intercept) 1.00 996 1103
##
## ~social_group (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI
## sd(Observed_Intercept) 0.04 0.04 0.00 0.13
## sd(weight_Intercept) 0.54 0.33 0.03 1.25
## cor(Observed_Intercept,weight_Intercept) -0.06 0.58 -0.96 0.93
## Rhat Bulk_ESS Tail_ESS
## sd(Observed_Intercept) 1.00 1934 2246
## sd(weight_Intercept) 1.01 900 1408
## cor(Observed_Intercept,weight_Intercept) 1.00 1111 2213
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Observed_Intercept 4.19 0.09 4.00 4.37 1.00 3917
## weight_Intercept 6.29 0.57 5.22 7.42 1.00 3177
## Observed_capture_year2017 0.04 0.06 -0.08 0.15 1.00 5004
## Observed_capture_year2018 -0.09 0.09 -0.27 0.08 1.00 4161
## Observed_known_age -0.09 0.05 -0.19 0.01 1.00 3519
## Observed_Iknown_ageE2 0.01 0.01 -0.00 0.02 1.00 3779
## Observed_sexM -0.04 0.07 -0.17 0.09 1.00 5120
## Observed_day0_cdp 0.06 0.08 -0.10 0.22 1.00 5222
## weight_capture_year2017 0.62 0.33 -0.02 1.26 1.00 4107
## weight_capture_year2018 0.25 0.49 -0.75 1.21 1.00 3727
## weight_known_age 0.66 0.30 0.07 1.24 1.00 3013
## weight_Iknown_ageE2 -0.08 0.03 -0.14 -0.02 1.00 3456
## weight_sexM 1.55 0.37 0.81 2.30 1.00 3901
## weight_day0_cdp 0.33 0.48 -0.62 1.25 1.00 5347
## Tail_ESS
## Observed_Intercept 3026
## weight_Intercept 2989
## Observed_capture_year2017 3256
## Observed_capture_year2018 3276
## Observed_known_age 3064
## Observed_Iknown_ageE2 2846
## Observed_sexM 3630
## Observed_day0_cdp 3451
## weight_capture_year2017 2656
## weight_capture_year2018 3084
## weight_known_age 2199
## weight_Iknown_ageE2 2332
## weight_sexM 2639
## weight_day0_cdp 3363
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape_Observed 16.47 3.29 10.82 23.72 1.00 1634 2433
## sigma_weight 1.51 0.12 1.29 1.76 1.00 1807 2483
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
conditional_effects(fit1)
bayes_R2(fit1)
## Estimate Est.Error Q2.5 Q97.5
## R2Observed 0.2714016 0.08475793 0.1116479 0.4307328
## R2weight 0.4484407 0.06753946 0.3046351 0.5657795
pp_check(fit1,resp='Observed')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
pp_check(fit1,resp='weight')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
#What Are the Names of the most abundant phyla?
physeq_phylumcollapse<- ps_rare %>% microbiome::aggregate_taxa(level="Phylum")
physeq_top5phyla = names(sort(taxa_sums(physeq_phylumcollapse), TRUE)[1:5])
physeq_top5phyla
## [1] "Firmicutes" "Proteobacteria" "Fusobacteriota" "Bacteroidota"
## [5] "Campylobacterota"
#Subset the phyloseq object to those phyla
physeq_top5phylum_filter<-subset_taxa(ps_rare,Phylum %in% physeq_top5phyla)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Sample Size
phyloseq::nsamples(physeq_top5phylum_filter)
## [1] 165
#Remake Our Graph but with no grouping (samples)
physeq_top5phylum_samples_plot <- physeq_top5phylum_filter %>%
microbiome::aggregate_taxa(level = "Phylum") %>%
microbiome::transform(transform = "compositional") %>%
plot_composition(sample.sort = "Firmicutes")
physeq_top5phylum_samples_plot
#Remake Our Graph but with averaging by YEAR
physeq_top5phylum_year_plot <- physeq_top5phylum_filter %>%
microbiome::aggregate_taxa(level = "Phylum") %>%
microbiome::transform(transform = "compositional") %>%
plot_composition(sample.sort = "Firmicutes", average_by = "capture_year") + scale_fill_manual(values = moma.colors("Levine2", direction=-1)) + theme_bw() + labs(fill="Bacterial Phylum",x="Capture Year") + plotopts2 + theme(axis.text.x = element_text(angle=45,hjust=1, size = 14), axis.text.y = element_text(size = 14)) + guides(fill="none")
physeq_top5phylum_year_plot
ggsave2('Badger Barplot by Year.pdf',physeq_top5phylum_year_plot,width=10,height=15,units="cm")
ggsave2('Badger Barplot by Year.tiff',physeq_top5phylum_year_plot,width=10,height=15,units="cm")
#Combined plot
abundance_plots<-plot_grid(physeq_top5phylum_social_plot,physeq_top5phylum_year_plot,nrow=1,labels=c("A", "B"), label_size = 25)
abundance_plots
ggsave("Combined_Badger_Barplots.png", abundance_plots, width = 30, height = 20, units = "cm")
ggsave("Combined_Badger_Barplots.pdf", abundance_plots, width = 30, height = 20, units = "cm")
#Sample for Stripping out the ASV matrix from a phyloseq object (run all of this)
vegan_otu <- function(physeq) {
OTU <- otu_table(physeq)
if (taxa_are_rows(OTU)) {
OTU <- t(OTU)
}
return(as(OTU, "matrix"))
}
#Extract Matrix and Sample Data - Same Dataset for Richness
ps_badger_forclr<-prune_samples(sample_data(ps_badger01)$Sample %in% badger_rich_complete_age$Sample,ps_badger01)
ps_clr<-microbiome::transform(ps_badger_forclr,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
woodchester_clr_v<-vegan_otu(ps_clr)
woodchester_clr_s<-as(sample_data(ps_clr),"data.frame")
woodchester_clr_s$sex<-badger_rich_complete_age$sex[match(woodchester_clr_s$Sample,badger_rich_complete_age$Sample)]
### Sample Sizes for PERMANOVA
nrow(woodchester_clr_s)
## [1] 145
length(unique(woodchester_clr_s$social_group))
## [1] 18
length(unique(woodchester_clr_s$id))
## [1] 62
table(woodchester_clr_s$capture_year)
##
## 2016 2017 2018
## 49 70 26
mean(complete.cases(woodchester_clr_s[,c("known_age","day0_cdp","social_group","sex","capture_year")])) #all complete data
## [1] 1
woodchester_clr_s$social_group<-factor(woodchester_clr_s$social_group)
#Fit Model
badger_clr_perm<-adonis2(woodchester_clr_v ~ social_group + known_age +sex + day0_cdp + factor(capture_year) + factor(id),data=woodchester_clr_s,method="euclidean",by="term")
badger_clr_perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = woodchester_clr_v ~ social_group + known_age + sex + day0_cdp + factor(capture_year) + factor(id), data = woodchester_clr_s, method = "euclidean", by = "term")
## Df SumOfSqs R2 F Pr(>F)
## social_group 17 29952 0.16459 1.5707 0.001 ***
## known_age 1 2244 0.01233 2.0001 0.001 ***
## sex 1 2102 0.01155 1.8739 0.003 **
## day0_cdp 1 1045 0.00574 0.9318 0.583
## factor(capture_year) 2 4369 0.02401 1.9473 0.001 ***
## factor(id) 48 59265 0.32566 1.1007 0.032 *
## Residual 74 83009 0.45613
## Total 144 181987 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############# CLR Transform Ordination
library(ggordiplots)
##PCA In Vegan
grouptab<-table(woodchester_clr_s$social_group)
group5<-names(grouptab)[grouptab>4]
ps_badger_orddata<-prune_samples(sample_data(ps_badger_forclr)$social_group %in% group5,ps_badger_forclr) #122
ps_orddata_clr<-microbiome::transform(ps_badger_orddata,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
woodchester_group5_clr_v<-vegan_otu(ps_orddata_clr)
woodchester_group5_clr_s<-as(sample_data(ps_orddata_clr),"data.frame")
### Sample Sizes for ORDINATION
nrow(woodchester_group5_clr_s)
## [1] 131
length(unique(woodchester_group5_clr_s$social_group))
## [1] 11
length(unique(woodchester_group5_clr_s$id))
## [1] 54
table(woodchester_group5_clr_s$capture_year)
##
## 2016 2017 2018
## 43 66 22
#Run PCA
woodchester_group5_clr_pca<-rda(woodchester_group5_clr_v)
#Inspect Plot
gg_ordiplot(woodchester_group5_clr_pca,groups=woodchester_group5_clr_s$social_group,spiders = TRUE)
#Extract for GGPLot2
social_ggord_data<-gg_ordiplot(woodchester_group5_clr_pca,groups=woodchester_group5_clr_s$social_group,spiders = TRUE,plot=FALSE)
social_ggord_data2<-social_ggord_data$df_ord
social_ggord_data2$capture_year<-woodchester_group5_clr_s$capture_year
#Add Social Group to DF SPIDERS
social_ggord_data2$df_spiders$Sample<-rownames(social_ggord_data2$df_spiders)
social_ggord_data2$df_spiders$capture_year<-woodchester_group5_clr_s$capture_year[match(social_ggord_data2$df_spiders$Sample,woodchester_group5_clr_s$Sample)]
#Plot
badger_social_clr_plot1<-ggplot() + geom_segment(data=social_ggord_data$df_spiders, aes(x=cntr.x, xend=x, y=cntr.y, yend=y, color=Group), show.legend = FALSE)
#Plot Points
badger_social_clr_plot2<- badger_social_clr_plot1 + geom_point(data=social_ggord_data2,aes(x=x,y=y,fill=Group),shape=21,color="white",size=5,alpha=0.8) + theme_bw()
#Colour
badger_social_clr_plot3<- badger_social_clr_plot2 + labs(x="PC1 (9.3%)",y="PC2 (8.5%)",fill="Site") + plotopts + theme(legend.text = element_text(size=12), axis.text.y = element_text(size = 14), axis.text.x = element_text(size = 14)) + labs(fill="Social Group") + scale_fill_manual(values = moma.colors("Warhol")) + scale_color_manual(values = moma.colors("Warhol"))
#Add Ellipses
badger_social1617_clr_plot4<- badger_social_clr_plot3 + geom_path(data = social_ggord_data$df_ellipse, aes(x = x, y = y, color = Group), show.legend = FALSE)
badger_social1617_clr_plot4
ggsave2('CLR Beta Diversity Group5.pdf',badger_social1617_clr_plot4,width=25,height=10,units="cm")
ggsave2('CLR Beta Diversity Group5.png',badger_social1617_clr_plot4,width=25,height=15,units="cm")
### All Badger CLR
ps_all_clr<-microbiome::transform(ps_clean_filter,"clr")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ps_all_clr_v<-vegan_otu(ps_all_clr)
ps_all_clr_s<-as(sample_data(ps_all_clr),"data.frame")
ps_all_ord<-rda(ps_all_clr_v)
ps_all_scores<-scores(ps_all_ord)$sites
ps_all_scores2<-cbind(ps_all_scores,ps_all_clr_s)
ps_all_scores2$Sample<-rownames(ps_all_scores2)
ps_all_scores2$observed_richness<-badger_rich$Observed[match(ps_all_scores2$Sample,badger_rich$Sample)]
#Subset
ps_all_scores3<-subset(ps_all_scores2,!is.na(known_age))
#Sample Size
nrow(ps_all_scores3)
## [1] 145
length(unique(ps_all_scores3$id))
## [1] 62
#### Dynamics with Age for Repeat Sampled Badger
badger_beta_tab<-table(ps_all_scores2$id,ps_all_scores2$known_age)
badger_beta_tab2<-apply(badger_beta_tab,1,function(x)sum(x>0))
badger_beta_complete_min3<-subset(ps_all_scores2,id %in% names(badger_beta_tab2)[badger_beta_tab2>2])
table(badger_beta_complete_min3$id)
##
## ID11 ID12 ID13 ID2 ID23 ID8
## 4 3 6 8 4 7
#### PLOT
pc1_age_plot1<-ggplot(badger_beta_complete_min3,aes(x=known_age,y=PC1,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Beta Diversity (PC1)",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none") + scale_fill_manual(values=MoMAColors::moma.colors("Levine2",6)) + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text.y = element_text(size = 14))
pc1_age_plot1
## `geom_smooth()` using formula = 'y ~ x'
#### Dynamics with Age for Repeat Sampled Badger
badger_rich_tab<-table(badger_rich$id,badger_rich$known_age)
badger_rich_tab2<-apply(badger_rich_tab,1,function(x)sum(x>0))
badger_rich_complete_min3<-subset(badger_rich,id %in% names(badger_rich_tab2)[badger_rich_tab2>2])
table(badger_rich_complete_min3$id)
##
## ID11 ID12 ID13 ID2 ID23 ID8
## 4 3 6 8 4 7
#### PLOT
rich_age_plot1<-ggplot(badger_rich_complete_min3,aes(x=known_age,y=Observed,fill=id)) + geom_smooth(method="lm",se=F) + geom_point(shape=21,size=5) + theme_bw() + plotopts + labs(y="Alpha Diversity (Richness)",x="Age") + scale_x_continuous(n.breaks=11) + facet_wrap(.~id) + guides(fill="none") + scale_fill_manual(values=MoMAColors::moma.colors("Levine2",6))+ theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text.y = element_text(size = 14))
rich_age_plot1
## `geom_smooth()` using formula = 'y ~ x'
#age_rich3,temporal_age_plot2,
#age_grid1<-plot_grid(rich_age_plot1,pc1_age_plot1,nrow=1,labels="AUTO",label_size = 25)
#ggsave2('Age Dynamics Plot.pdf',age_grid1,width=28,height=12,units="cm")
#ggsave2('Age Dynamics Plot.png',age_grid1,width=28,height=12,units="cm")
#Known Age
ps_age<-prune_samples(sample_data(ps_rare)$id %in% ps_all_scores3$id & !is.na(sample_data(ps_rare)$known_age),ps_rare)
#Filter to Top 5 Phyla
ps_age_top5phylum_filter<-subset_taxa(ps_age,Phylum %in% physeq_top5phyla)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Plot Data
ps_age_top5phylum_data <- ps_age_top5phylum_filter %>%
microbiome::aggregate_taxa(level = "Phylum") %>%
microbiome::transform(transform = "compositional") %>% plot_composition(sample.sort = "known_age")
#Copy Across ID
ps_age_top5phylum_data2<-ps_age_top5phylum_data$data
ps_age_top5phylum_data2$id<-sample_data(ps_age_top5phylum_filter)$id[match(ps_age_top5phylum_data2$Sample,sample_data(ps_age_top5phylum_filter)$Sample)]
ps_age_top5phylum_data2$age<-sample_data(ps_age_top5phylum_filter)$known_age[match(ps_age_top5phylum_data2$Sample,sample_data(ps_age_top5phylum_filter)$Sample)]
#Plot
ps_age_top5phylum_plot <- ggplot(ps_age_top5phylum_data2, aes(x = xlabel, y = Abundance, fill = Tax)) + scale_x_discrete(breaks = ps_age_top5phylum_data2$xlabel,labels=as.character(ps_age_top5phylum_data2$age)) + geom_bar(position = "stack", stat = "identity") + facet_wrap(~id, scales = "free") + theme_bw() + scale_fill_manual(values = moma.colors("Levine2", direction = -1)) + labs(fill="Bacterial Phylum",x="Age")
ps_age_top5phylum_plot
ggsave2('Badger Barplot by Age.pdf',ps_age_top5phylum_plot,width=30,height=30,units="cm")
ggsave2('Badger Barplot by Age.tiff',ps_age_top5phylum_plot,width=30,height=30,units="cm")
Uses A different dataset filtered to known infection badgers
#Subset To Individualls with Multiuple entires
woodchester_temp_table<-table(ps_all_scores2$id)
woodchester_scores_sub<-subset(ps_all_scores2,id %in% names(woodchester_temp_table)[woodchester_temp_table>2])
temporal_plot1<-ggplot(woodchester_scores_sub,aes(x=sampledate_julian,y=PC1)) + geom_smooth(method="lm",se=F)+ geom_point(size=5,shape=21,aes(fill=day0_cdp),color="gray77")
temporal_plot2<- temporal_plot1+ facet_wrap(.~id) + labs(x= "Julian Day",fill="M. bovis Infection Probability") + theme_bw() + theme(axis.text=element_text(size=15),strip.text.x=element_text(size=15),axis.title=element_text(size=15),legend.position = "top",axis.text.x=element_text(angle=90,hjust=1))
temporal_plot3<- temporal_plot2 + scale_fill_moma_c("Exter")
temporal_plot3
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'
ggsave2('Infection PC1 Temporal Plot.pdf',temporal_plot3,height=18,width=20,units="cm")
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'
ggsave2('Infection PC1 Temporal Plot.tiff',temporal_plot3,height=18,width=20,units="cm")
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## OTUs Top 50
badger_genus<-aggregate_top_taxa2(ps_badger_forclr, "Genus", top = 50)
clr_scaled <-microbiome::transform(badger_genus, transform = "clr")
#Extract
ys <- data.frame(t(otu_table(clr_scaled)))
names(ys) <-taxa_names(clr_scaled)
#Predictors
Xs<-data.frame(sample_data(clr_scaled)) %>% select(id,day0_cdp,known_age,social_group,capture_year)
Xs$sex<-badger_rich_complete_age$sex[match(rownames(Xs),badger_rich_complete_age$Sample)]
Xs$capture_year<-factor(Xs$capture_year)
#Define Study Design
sDesign<-data.frame(id = Xs$id)
## Model
fit_reduced_scaled <- gllvm(ys, Xs,
num.lv = 2,
formula = ~ capture_year + day0_cdp + known_age + sex,
family = "gaussian",
row.eff = ~(1|id),starting.val='random', studyDesign = sDesign)
## Warning in gllvm(ys, Xs, num.lv = 2, formula = ~capture_year + day0_cdp + : There are rows full of zeros in y.
coefplot(fit_reduced_scaled)
## Model
fit_reduced_scaled2 <- gllvm(ys, Xs,
num.lv = 2,
formula = ~ capture_year + sex * day0_cdp + known_age,
family = "gaussian",
row.eff = ~(1|id),starting.val='random', studyDesign = sDesign)
## Warning in gllvm(ys, Xs, num.lv = 2, formula = ~capture_year + sex * day0_cdp + : There are rows full of zeros in y.
#Estimates
df<-coef(fit_reduced_scaled2)
est_df<-data.frame(df$Intercept)
est_df2<-data.frame(df$Xcoef)
est_df3<-merge(est_df, est_df2, by = 0)
#Order genera
row.names(est_df3)<-est_df3$Row.names
est_df3<-est_df3[colnames(ys),]
names(est_df3)[1]<- "Genus"
names(est_df3)[2]<- "Intercept"
### COnfidence Intervals
confint_df<-data.frame(confint(fit_reduced_scaled2))
#Identify Rows with Main Effects
one_comma<-sapply(rownames(confint_df),function(x) length(gregexpr(":", x, fixed = TRUE)[[1]]))==1
###Strip Out Individual Datasets
##CDP
cdp_main<-grepl("^Xcoef.day0_cdp",rownames(confint_df))
int_data_day0cdp<-cbind(est_df3[,c("Genus","day0_cdp")],confint_df[cdp_main+one_comma==2,])
##Sex
sex_main<-grepl("^Xcoef.sexM",rownames(confint_df))
int_data_sexM<-cbind(est_df3[,c("Genus","sexM")],confint_df[sex_main+one_comma==2,])
##Age
age_main<-grepl("^Xcoef.known_age",rownames(confint_df))
int_data_age<-cbind(est_df3[,c("Genus","known_age")],confint_df[age_main+one_comma==2,])
# ##Tb Sex Interaction
int_data_sex_day0<-cbind(est_df3[,c("Genus","sexM.day0_cdp")],confint_df[grep("Xcoef.sexM:day0_cdp",rownames(confint_df)),])
# ##Age: Tb Interaction
# int_data_age_tb<-cbind(est_df3[,c("Genus","day0_cdp.known_age")],confint_df[grep("Xcoef.day0_cdp:known_age",rownames(confint_df)),])
#
#Extra Variables and Rename Columns
colnames(int_data_day0cdp)<-c("Genus","Estimate","l95","u95")
colnames(int_data_sexM)<-c("Genus","Estimate","l95","u95")
colnames(int_data_age)<-c("Genus","Estimate","l95","u95")
# colnames(int_data_age_tb)<-c("Genus","Estimate","l95","u95")
colnames(int_data_sex_day0)<-c("Genus","Estimate","l95","u95")
int_data_day0cdp$trait<-"Infection Probability"
int_data_sexM$trait<-"sex Male"
int_data_age$trait<-"Age"
# int_data_age_tb$trait<-"Age:Infection"
int_data_sex_day0$trait<-"Male:Infection"
tb_mod_plotdata<-rbind(int_data_day0cdp,int_data_sexM,int_data_age,int_data_sex_day0)
#Order
tb_mod_plotdata$trait<-factor(tb_mod_plotdata$trait,levels=c("sex Male","Age","Infection Probability","Male:Infection"))
tb_mod_plotdata2<- tb_mod_plotdata %>% group_by(trait) %>% arrange(Estimate,.by_group=T)
tb_mod_plotdata2$Genus<-factor(tb_mod_plotdata2$Genus,levels=unique(tb_mod_plotdata2$Genus))
#Significance
tb_mod_plotdata2$Sig<- !data.table::between(0, tb_mod_plotdata2$l95, tb_mod_plotdata2$u95)
sig_col<-moma.colors("Levine2")[6]
##Subset
tb_mod_plotdata_sigtab<-with(tb_mod_plotdata2,table(Genus,Sig))
tb_mod_plotdata_sigsubset<-subset(tb_mod_plotdata2,Genus %in% rownames(tb_mod_plotdata_sigtab)[tb_mod_plotdata_sigtab[,2]>0])
### Plot
tb_plot1<-ggplot(tb_mod_plotdata_sigsubset,aes(x=Estimate,y=Genus)) + geom_errorbarh(aes(xmin=l95,xmax=u95,color=Sig),linewidth = 1.2,alpha=0.7) + geom_point(size=7,shape=21,color="gray40",aes(fill=Sig),alpha=0.7)
tb_plot2<- tb_plot1 + theme_bw(base_size = 20) + geom_vline(xintercept=0,linetype="dashed") + scale_color_manual(values=c("gray77",sig_col)) + scale_fill_manual(values=c("white",sig_col)) + guides(fill="none",color="none") + facet_wrap(.~trait,scales = "free_x", nrow = 1) +
theme(
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
strip.text = element_text(size = 28))
tb_plot2
#ggsave('GLLVM plot.tiff', tb_plot2, width = 25, height = 20)
#### Correlations
cr1<-data.frame(getResidualCor(fit_reduced_scaled2))#
names(cr1)<-names(ys)
names(cr1)<-abbreviate(names(cr1), minlength = 15)
rownames(cr1)<-abbreviate(rownames(cr1), minlength = 15 )
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
#devtools::install_github("kassambara/ggcorrplot")
library(ggcorrplot)
##
## Attaching package: 'ggcorrplot'
## The following object is masked from 'package:rstatix':
##
## cor_pmat
#install.packages("ggpubr")
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
cr2<-cor_pmat(cr1)
corplot<-ggcorrplot(cr1,
hc.order = TRUE,
outline.col = "white",
type = "full",
ggtheme = ggplot2::theme_minimal(base_size = 10),
tl.cex = 12,
p.mat = cr2,
sig.level = 0.05,
lab_size = 30,
#show.diag = F,
insig = "blank",
# colors = c("#6D9EC1", "white", "#E46726"))
colors = c("blue", "white", "red"))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 20), axis.text.y = element_text(size = 20),legend.text = element_text(size=20),legend.title = element_text(size=25),legend.key.size = unit(1, 'cm'))+
theme(plot.margin=unit(c(0.2,0.2,0.2,2),"cm"))
corplot
#ggsave('Correlation plot.tiff', corplot, width = 25, height = 20)
## Save Grid
tb_mod_combined1<-ggarrange(tb_plot2,corplot,nrow=2,heights=c(0.8,1),widths=c(1,1),align="h",labels="AUTO",font.label=list(size=50))
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
tb_mod_combined1
cowplot::ggsave2('Tb Combined Model Outputs.png',tb_mod_combined1,width=20,height=25)
cowplot::ggsave2('Tb Combined Model Outputs.pdf',tb_mod_combined1,width=20,height=25)
# tb_mod_combined2<-ggarrange(tb_plot3,corplot,ncol=1,heights=c(0.8,1),widths=c(0.75,1),align="v",labels="AUTO",font.label=list(size=30))
# tb_mod_combined2
# cowplot::ggsave2('Tb Combined Model Outputs Column.pdf',tb_mod_combined2,width=12,height=20)
#
5.2.1 Social Group